/*
This file cleans data for descriptives on migration behavior

*/
use "U:\dtafiles\ind_head_spouse.dta", clear
rename age* ageind*
merge m:1 intnum69 using "U:\dtafiles\fam69.dta"
drop _merge
foreach num of numlist 70/97 99 11 13 15 17 19{
	merge m:1 intnum`num' using "U:\dtafiles\fam`num'.dta"
	drop _merge
}

foreach num of numlist 1 3 5 7 9{
	merge m:1 intnum0`num' using "U:\dtafiles\fam0`num'.dta"
	drop _merge
}
drop if ER30002 == .

foreach v in intnum seq relhead age sex agespouse race racespouse educ ageind state maritalstatus inhome husband_inc wife_inc husband_weeks wife_weeks husband_hours wife_hours{
	foreach num of numlist 1 3 5 7 9{
	rename `v'0`num' `v'`num'
	}
}
rename sex sex_ind
reshape long intnum seq relhead age sex agespouse ageind educ race racespouse state maritalstatus inhome husband_inc wife_inc husband_weeks husband_hours wife_weeks wife_hours, i(intnum68 ER30002) j(year)

replace year = year +1900 if year >60
replace year = year +2000 if year < 60
drop if intnum == 0
drop if year < 1970

*** Hours Variables
replace wife_hours = . if wife_hours > 112 & year >= 1994
replace husband_hours = . if husband_hours > 112 & year >= 1994

replace husband_weeks = . if husband_weeks > 52
replace wife_weeks = . if wife_weeks > 52


g husband_annual_hours = husband_hours*husband_weeks 
g  wife_annual_hours = wife_hours*wife_weeks 

g husband_weekly_hours = husband_hours
g wife_weekly_hours = wife_hours 

g husband_FT = husband_weekly_hours >=37.5
replace husband_FT = . if husband_weekly_hours == .
g wife_FT = wife_weekly_hours >=37.5
replace wife_FT = . if wife_weekly_hours== .

g husband_PT = husband_weekly_hours>= 1 & husband_weekly_hours < 37.5
replace husband_PT = . if husband_weekly_hours == .
g wife_PT = wife_weekly_hours>= 1 & wife_weekly_hours < 37.5
replace wife_PT = . if wife_weekly_hours == .


merge m:1 intnum year using "U:\dtafiles\state_county_clean.dta"
drop if _merge == 2
drop _merge

rename state_fips resp_state_fips
rename county_fips resp_county_fips
rename statecounty resp_statecounty

rename ER30001 resp_ER30001
rename ER30002 resp_ER30002

*** Merge on mother's location
rename mother_ER30001 ER30001
rename mother_ER30002 ER30002
merge m:1 ER30001 ER30002 year  using "U:\dtafiles\indid_famid_location.dta"
drop if _merge == 2

rename state_fips mother_state_fips
rename county_fips mother_county_fips
rename statecounty mother_statecounty

rename ER30001 mother_ER30001
rename ER30002 mother_ER30002
rename _merge mother_merge
*** Merge on father's location
rename father_ER30001 ER30001
rename father_ER30002 ER30002
merge m:1 ER30001 ER30002 year  using "U:\dtafiles\indid_famid_location.dta"
drop if _merge == 2

rename state_fips father_state_fips
rename county_fips father_county_fips
rename statecounty father_statecounty

rename ER30001 father_ER30001
rename ER30002 father_ER30002
rename _merge father_merge

 g headspouse = (relhead == 1 | relhead == 2 | relhead == 10 | relhead == 20 | relhead == 22)
keep if headspouse == 1 // Keep only if it is a head or a spouse
keep if seq <= 2 // Keep only if it is not a person moving out of a HH in the same year

bys intnum year: g numper = _n // count number of people within HH




*** Assign based on whichever spouse has an attached location for their parent (is never both b/c would have to be two PSID respondents married to each other)

 

bys intnum year: egen mother_statecountyadj = min(mother_statecounty)
bys intnum year: egen father_statecountyadj = min(father_statecounty)
bys intnum year: egen mother_gender= max(sex_ind*(mother_statecounty != .))
bys intnum year: egen father_gender = max(sex_ind*(father_statecounty != .))
bys intnum year: egen mother_stateadj = min(mother_state_fips)
bys intnum year: egen father_stateadj = min(father_state_fips)

g same_county_parent = (resp_statecounty == mother_statecountyadj | resp_statecounty == father_statecountyadj)
g same_county_mother = resp_statecounty == mother_statecountyadj
g same_county_father = resp_statecounty == father_statecountyadj

g same_state_parent = (resp_state_fips == mother_stateadj | resp_state_fips == father_stateadj)
g same_state_mother = resp_state_fips == mother_stateadj
g same_state_father = resp_state_fips == father_stateadj


g firstbirthind = year == year_firstchild

g inc_f = wife_inc if sex == 1
replace inc_f = husband_inc if sex == 2 
g inc_m = husband_inc if sex == 1
replace inc_m = wife_inc if sex == 2 

g annualhrs_f = wife_annual_hours if sex == 1
replace  annualhrs_f = husband_annual_hours if sex == 2
g annualhrs_m = husband_annual_hours if sex == 1
replace  annualhrs_m = wife_annual_hours if sex == 2

g FTemp_f = wife_FT if sex == 1
replace  FTemp_f = husband_FT if sex == 2
g FTemp_m = husband_FT if sex == 1
replace  FTemp_m = wife_FT if sex == 2

g PTemp_f = wife_PT if sex == 1
replace  PTemp_f = husband_PT if sex == 2
g PTemp_m = husband_PT if sex == 1
replace  PTemp_m = wife_PT if sex == 2

g white_f = racespouse ==1 if racespouse != . & sex == 1
replace white_f = race ==1 if racespouse == . | sex == 2

g black_f = racespouse ==2 if racespouse != . & sex == 1
replace black_f = race ==2 if racespouse == . | sex == 2


g college = educ >= 16
replace college =. if educ >90 | educ == 0

g hsdegree = educ >= 12 & educ <= 15
replace hsdegree = . if educ > 90 | educ == 0

cpigen
replace cpiu = 1.352828 if year == 2013
replace cpiu = 1.376405 if year == 2015
replace cpiu = 1.423461 if year == 2017
replace cpiu = 1.484652 if year == 2019


g inc_f_adj = inc_f/cpiu
g inc_m_adj = inc_m/cpiu


g age_f = agespouse if sex == 1
replace age_f = age if sex == 2
drop if inc_f > 1500000
g inc_f_pos = inc_f > 100
replace inc_f_pos = . if inc_f == .
keep if age_f >= 18 & age_f <= 55

g year_fiveprebirth = year_firstchild -5
g year_tenpostbirth = year_firstchild+10

g insample = (year>= year_fiveprebirth & year<= year_tenpostbirth)
*keep if insample == 1
** Counting number of years
egen uniqid = group(resp_ER30001 resp_ER30002)
bys uniqid year: gen nyears = _n == 1
 by uniqid: replace nyears = sum(nyears)
 by uniqid: replace nyears = nyears[_N]
 
 ** 
 g preperiod = (year>= year_fiveprebirth & year< year_firstchild)
 g postperiod =(year> year_firstchild & year<= year_tenpostbirth)
 
 bys uniqid preperiod year: gen npreyears = _n == 1 if preperiod == 1
 by uniqid : replace npreyears = sum(npreyears)
 by uniqid : replace npreyears = npreyears[_N]

  bys uniqid postperiod year: gen npostyears = _n == 1 if postperiod == 1
 by uniqid : replace npostyears = sum(npostyears)
 by uniqid : replace npostyears = npostyears[_N]
 
 
 g insample2a = (nyears >= 8 & npreyears > 1 & npostyears > 1)
 g insample2 = (nyears >= 8 & npreyears > 2 & npostyears > 1)
  g insample4 = (nyears >= 12 & npreyears > 1 & npostyears > 1)
 g event0 = year == year_firstchild
 
 foreach num of numlist 1/5{
 	g eventpre`num' = (year+`num' == year_firstchild)
 }
 
  foreach num of numlist 1/10{
 	g eventpost`num' = (year-`num' == year_firstchild)
 }
 
 g eventpre_orig = eventpre2
 replace eventpre2 = 1
  g age_firstchild = year_firstchild -(year- age)
  
 g grandparent_birth = . 
 replace grandparent_birth = 1 if same_county_parent == 1 & event0 == 1
 bysort uniqid (grandparent_birth): replace grandparent_birth = grandparent_birth[1]
 replace grandparent_birth = 0 if grandparent_birth == . 

   g nograndparent_birth = . 
 replace nograndparent_birth = 1 if same_state_parent == 0 & event0 == 1
 bysort uniqid (nograndparent_birth): replace nograndparent_birth = nograndparent_birth[1]
 replace nograndparent_birth = 0 if nograndparent_birth == . 
 
 g married_birth = .
  replace married_birth = 1 if maritalstatus == 1 & event0 == 1
  bysort uniqid (married_birth): replace married_birth = married_birth[1]
  replace married_birth = 0 if married_birth == .
  
  

  g mother_miss = mother_statecountyadj  == .
 * replace mother_miss = 1 if mother_ER30002 > 900
  g father_miss = father_statecountyadj  == .
  *replace father_miss = 1 if father_ER30002 > 900
   
 g inc_adj = inc_m_adj if sex_ind == 1
replace inc_adj = inc_f_adj if sex_ind == 2
 
 
 
 rename resp_statecounty county1
 rename mother_statecountyadj  county2
 merge m:1 county1 county2 using "U:\dtafiles\dist_counties.dta"
 drop if _merge == 2
 tab _merge mother_miss
 
 rename mi_to_county dist_mother
 rename county2 mother_statecountyadj
  replace dist_mother = 0 if county1 == mother_statecountyadj
 drop _merge
 
  rename father_statecountyadj  county2
 merge m:1 county1 county2 using "U:\dtafiles\dist_counties.dta"
 drop if _merge == 2
 tab _merge father_miss
 
 rename mi_to_county dist_father
 rename county2 father_statecountyadj
 rename county1 resp_statecounty
 drop _merge

  replace dist_father = 0 if resp_statecounty == father_statecountyadj
 
 g lessthan50 = dist_mother <=50 | dist_father <= 50
  g lessthan100 = dist_mother <=100 | dist_father <= 100
  
  
   g lessthan100_birth = . 
 replace lessthan100_birth = 1 if lessthan100 == 1 & event0 == 1
 bysort uniqid (lessthan100_birth): replace lessthan100_birth = lessthan100_birth[1]
 replace lessthan100_birth = 0 if lessthan100_birth == . 
 
    g lessthan50_birth = . 
 replace lessthan50_birth = 1 if lessthan50 == 1 & event0 == 1
 bysort uniqid (lessthan50_birth): replace lessthan50_birth = lessthan50_birth[1]
 replace lessthan50_birth = 0 if lessthan50_birth == . 
 
 
 
    g mindistparent = dist_mother if dist_mother < dist_father
	replace mindistparent = dist_father if (dist_mother >= dist_father)

g dist_birth = mindistparent if  event0 == 1
 bysort uniqid (dist_birth): replace dist_birth = dist_birth[1]


g dist_birthpre1 = mindistparent if  eventpre_orig == 1
 bysort uniqid (dist_birthpre1 ): replace dist_birthpre1  = dist_birthpre1[1]
 
g greaterthan500_birth = dist_birth == 999
 
 g dist_cat = .
 replace dist_cat = 1 if dist_birth == 0
 replace dist_cat = 2 if dist_birth >0 & dist_birth <25

     replace dist_cat = 3 if dist_birth >25 & dist_birth <50
     replace dist_cat = 4 if dist_birth >50 & dist_birth <100
	 replace dist_cat = 4 if dist_birth > 100
	 

	 
	 
	** Weighting for near and far from  grandparent ( defined as < 25 miles)
		 g near_indicator = dist_cat <= 2
		/* 
g emp_pre2 = inc_f_adj > 500 if  eventpre_orig == 1
replace emp_pre2 = . if inc_f_adj == .  & eventpre_orig == 1
 bysort uniqid (emp_pre2 ): replace emp_pre2= emp_pre2[1]
	g emp_pre1 = inc_f_adj > 500 if  eventpre1 == 1
 bysort uniqid (emp_pre1 ): replace emp_pre1 = emp_pre1[1] 
 g emp_pre3= inc_f_adj > 500 if  eventpre3 == 1
 bysort uniqid (emp_pre3 ): replace emp_pre3 = emp_pre3[1] 
 
 
 g miss_emppre1 = (emp_pre1 == .)
 replace emp_pre1 = 99 if emp_pre1 ==.
  
 
 g miss_emppre2 = (emp_pre2 == .)
 replace emp_pre2 = -1 if emp_pre2 ==.
 
  
 
 g miss_emppre3 = (emp_pre3 == .)
 replace emp_pre3 = 99 if emp_pre3 ==.
 */
 
g inc_f_prebirth2 = log(inc_f_adj) 	 if  eventpre_orig == 1
 bysort uniqid (inc_f_prebirth2 ): replace inc_f_prebirth2 = inc_f_prebirth2[1]
 
 g miss_preincf2 = inc_f_prebirth2 == .
 replace inc_f_prebirth2 = 999999 if miss_preincf2 == 1
 
 *g inc_prebirth = inc_adj 	 if  eventpre_orig == 1
 *bysort uniqid (inc_f_prebirth ): replace inc_prebirth = inc_prebirth[1]
 
 *g miss_preinc = inc_prebirth == .
 *replace inc_prebirth = 999999 if miss_preinc == 1

g inc_f_prebirth1 = log(inc_f_adj) 	 if  eventpre1 == 1
 bysort uniqid (inc_f_prebirth1 ): replace inc_f_prebirth1 = inc_f_prebirth1[1]
 
 g miss_preincf1 = inc_f_prebirth1 == .
 replace inc_f_prebirth1 = 999999 if miss_preincf1 == 1
 /*
 g inc_f_prebirth3 = log(inc_f_adj) 	 if  eventpre3 == 1
 bysort uniqid (inc_f_prebirth3 ): replace inc_f_prebirth3 = inc_f_prebirth3[1]
 
 g miss_preincf3 = inc_f_prebirth3 == .
 replace inc_f_prebirth3 = 999999 if miss_preincf3 == 1
*/
 
 /*
 
 g inc_prebirth1 = inc_adj 	 if  eventpre1== 1
 bysort uniqid (inc_f_prebirth1 ): replace inc_prebirth1 = inc_prebirth1[1]
 
 g miss_preinc1 = inc_prebirth1 == .
 replace inc_prebirth1 = 999999 if miss_preinc1 == 1
 */
 g miss_white = white == .
 replace white = 99 if miss_white == 1
 g miss_black = black == .
 replace black = 99 if miss_black == 1
 g miss_college = college == .
 replace college = 99 if miss_college == 1
 g miss_hs = hsdegree == .
 replace hsdegree = 99 if miss_hs == 1
 
 
	 probit near_indicator married  college hsdegree white black   inc_f_prebirth* age_firstchild miss*
	 predict pi
	 
	 g grandparent_weight =.
	 summ pi
	 replace grandparent_weight = `r(mean)'/pi if near_indicator == 1
	 replace grandparent_weight = (1-`r(mean)')/(1-pi) if near_indicator == 0
	 
	 
	 
	 
	*** Child Care Measures 
 merge m:1 resp_statecounty using "U:\dtafiles\childcare_clean.dta"
 drop if _merge == 2
 drop _merge
 
  g above75CC = CC_centile >= 75
 g CC_in10s = MCToddler/10
 
 

 ** Weighting for high vs low CC
 probit above75CC married  college hsdegree white black  inc_f_prebirth* age_firstchild miss*
	 predict pi2
	 
	 g cc_weight =.
	 summ pi2
	 replace cc_weight = `r(mean)'/pi2 if above75CC == 1
	 replace cc_weight = (1-`r(mean)')/(1-pi2) if above75CC == 0
 
	 
*** Migration
sort uniqid year
by uniqid: g move_state = resp_state_fips[_n] != resp_state_fips[_n-1]
by uniqid: replace move_state = . if resp_state_fips[_n] == . | resp_state_fips[_n-1] == .
by uniqid: g move_statecounty = resp_statecounty[_n] != resp_statecounty[_n-1]
by uniqid: replace move_statecounty = . if resp_statecounty[_n] == . | resp_statecounty[_n-1] == .
by uniqid: g move_fromhome = move_state == 1 & same_state_parent[_n-1] == 1
g move_tohome = move_state == 1 & same_state_parent == 1


bys uniqid: egen totalmoves = total(move_state)
bys uniqid: egen totalmoveshome = total(move_tohome)
bys uniqid: egen totalmovesfromhome = total(move_fromhome)
*** Types of moves with reference to home
  g movetype = 1 if move_tohome == 1
  replace movetype = 2 if move_fromhome == 1
  replace movetype = 3 if move_state == 1  & movetype == .
  replace movetype = 4 if movetype == .
  
  *** Life era relative to age of first child 
  g agechild = year- year_firstchild
  g nochild = year_firstchild == 9999
  g childstatus = .
  replace childstatus = 1 if agechild < -1 & agechild >= -7 // Before first child
  replace childstatus = 2 if agechild>=-1 & agechild <=4  // Pregnancy +Until Age 5
  replace childstatus = 3 if agechild >4 & agechild <= 10 // 6 years post young child
   replace childstatus = 4 if agechild > 10 & agechild <= 15 
    replace childstatus = 5 if agechild > 15 & agechild <= 20 
  replace childstatus = 6 if nochild == 1 // Never has a child
  
  bys uniqid childstatus: g numofyears = _n  // Number of years in child status
  bys uniqid childstatus: egen totalmove_lifeperiod = total(move_state) // Total moves across state in life period 
  bys uniqid childstatus: egen totalmovetohome_lifeperiod = total(move_tohome) // total moves home in life period 
  bys uniqid childstatus: egen totalmovefromhome_lifeperiod = total(move_fromhome) // total moves away from home in life period
  
  
  cap drop temp
  g temp = totalmove_lifeperiod == 0 & childstatus == 2
  bys uniqid: egen nevermove_yc = max(temp) // = 1 if person never moved during period with young child
  
    cap drop temp
  g temp = totalmove_lifeperiod == 0 & childstatus == 1
  bys uniqid: egen nevermove_bc = max(temp) // = 1 if person never moved during period before young child
    cap drop temp
  g temp = totalmove_lifeperiod == 0 & childstatus == 3
  bys uniqid: egen nevermove_ac = max(temp)  // = 1 if person never moved during period after young child
  
  cap drop temp
  g temp = totalmovetohome_lifeperiod >0 & totalmovetohome_lifeperiod== totalmove_lifeperiod & childstatus == 2
  bys uniqid: egen onlymovehome_yc = max(temp) // = 1 if person only moved home out of all moves during young child
    cap drop temp
  g temp = totalmovetohome_lifeperiod >0 & totalmovetohome_lifeperiod< totalmove_lifeperiod & childstatus == 2
  
  bys uniqid: egen movehomeplus_yc = max(temp) // = 1 if person moved home and did other moves 
  cap drop temp
    g temp = totalmovefromhome_lifeperiod >0 & totalmovefromhome_lifeperiod== totalmove_lifeperiod & childstatus == 2
  bys uniqid: egen onlymoveaway_yc = max(temp) // = 1 if person only moved away from home during YC
    cap drop temp
  g temp = totalmovefromhome_lifeperiod >0 & totalmovefromhome_lifeperiod< totalmove_lifeperiod & childstatus == 2
  bys uniqid: egen moveawayplus_yc = max(temp) // = 1 if person moved away and did other moves
 cap drop temp
 g temp = totalmovefromhome_lifeperiod == 0 & totalmovetohome_lifeperiod == 0 & totalmove_lifeperiod >0 & childstatus == 2
 bys uniqid: egen moveother_yc = max(temp) // = 1 if person only did moves between non-home locales
 cap drop temp
 g temp = childstatus == 2
 bys uniqid: egen ever_yc = max(temp) // = 1 if ever observed during young child period
 cap drop temp 
  cap drop temp
 g temp = childstatus == 1
 bys uniqid: egen ever_bc = max(temp) // = 1 if ever observed during before child period 
 cap drop temp
 
  cap drop temp
 g temp = childstatus == 3
 bys uniqid: egen ever_ac = max(temp) // = 1 if ever observed after child
 cap drop temp
 g temp = same_state_parent ==1 & childstatus == 2
 bys uniqid: egen inhome_yc = max(temp) // = 1 if ever in home local during young child
 cap drop temp
  g temp = same_state_parent ==0 & childstatus == 2
 bys uniqid: egen outhome_yc = max(temp) // = 1 if ever NOT in home local during young child
 cap drop temp
 
 
  cap drop temp
 g temp = same_state_parent ==1 
 bys uniqid: egen numyearshome = total(same_state_parent)
  bys uniqid childstatus: egen numyearshomeperiod = total(same_state_parent)
 bys uniqid childstatus: egen totalnumyears = max(numofyears)
 cap drop temp
 bys uniqid: g temp = _n
 bys uniqid: egen yearsobs = max(temp)
 g propor_home_period = numyearshomeperiod/totalnumyears
 g propor_hom = numyearshome/yearsobs
 
 *** Generating some Lifetime Income variables
 bys uniqid: egen startage= min(age)
 bys uniqid: egen endage= max(age)
 g earliestage= age == startage
 
 ** income between 30 and 40, for people that we see in the data before 30 and after 40
 cap drop incf_age30to40
 cap drop temp
 bys uniqid: egen temp=total(inc_f_adj) if age>=30 & age<=40 & startage <=30 & endage>=40
bys uniqid: egen incf_age30to40 = min(temp)
 
 ** income between 25 and 50, for people we saw before 25 and after 50
  cap drop incf_age25to50
 cap drop temp
 bys uniqid: egen temp =total(inc_f_adj) if age>=25 & age<=50 & startage <=25 & endage>=50
 bys uniqid: egen incf_age25to50 = min(temp)
 
** income during young child period
cap drop incf_yc avgincf_yc incf_yc_imputed
cap drop temp
cap drop temp2
bys uniqid: egen temp =  total(inc_f_adj) if childstatus == 2
g temp2 = temp/numofyears if childstatus == 2
bys uniqid: egen incf_yc = min(temp)
bys uniqid: egen avgincf_yc = min(temp2)
g incf_yc_imputed = avgincf_yc*6  


*** Income in two periods
sort uniqid year
bys uniqid: g inc_f_adj_plus2 = inc_f_adj[_n+2]
sort uniqid year
bys uniqid: g inc_f_adj_pre1 = inc_f_adj[_n-1]
g changeinc_f = inc_f_adj_plus2- inc_f_adj
g changeinc_f_v2 = inc_f_adj_plus2- inc_f_adj_pre1
g pctchange_f = changeinc_f/inc_f_adj
replace pctchange_f = . if pctchange_f > 5
g pctchange_f_v2= changeinc_f_v2/inc_f_adj_pre1
replace pctchange_f_v2 = . if pctchange_f_v2 > 5

**** Move
sort uniqid year
 g move0 = move_state == 1
 
 foreach num of numlist 1/15{
 	by uniqid: g movepre`num' = move_state[_n +`num']
 }
 
  foreach num of numlist 1/15{
 	by uniqid: g movepost`num' = move_state[_n -`num']
 }
sort uniqid year
 g movehome0 = move_tohome == 1
 
 foreach num of numlist 1/15{
 	by uniqid: g movehomepre`num' = move_tohome[_n +`num']
 }
 
  foreach num of numlist 1/15{
 	by uniqid: g movehomepost`num' = move_tohome[_n -`num']
 }
sort uniqid year
 g moveaway0 = move_fromhome == 1
 
 foreach num of numlist 1/15{
 	by uniqid: g moveawaypre`num' = move_fromhome[_n +`num']
 }
 
  foreach num of numlist 1/15{
 	by uniqid: g moveawaypost`num' = move_fromhome[_n -`num']
 }

 g moveunrelated0 = (move0 ==1 & movehome0 == 0 & moveaway0 == 0)
 
  save "U:\dtafiles\descriptivessample_PSIDmigration.dta", replace